## robustness: HS6 khandelwal
## quality downgrading generalization

### =========================================================================
### PRELIMINARIES
### =========================================================================

require(data.table)
require(lfe)

require(ggplot2)
require(stargazer)

require(geosphere)

### =========================================================================
### DATA INPUT
### =========================================================================

## CODE A VARIABLE basedirectory TO BE EQUAL TO THE GENERALIZATION DATA FOLDER
## e.g., baseline <- 'D:\\Dropbox\\generalization'

basedirectory <- 'input a file path here'
setwd(basedirectory)



## -- -- -- -- -- --
## HS6 trade data

setwd(paste(basedirectory, '\\trade data\\', sep=''))
files <- list.files()

lr <- lapply(1:length(files), function(x) fread(files[x]))
lr <- lapply(1:length(files), function(x) lr[[x]][, c(3, 8:10, 12:13, 22, 23, 30, 32), with=F])

for (i in 1:length(lr)){lr[[i]][[9]] <- as.numeric(lr[[i]][[9]])}

raw <- rbindlist(lr)
setnames(raw, c('period', 'flow', 'country_code', 'country', 'partner_code', 'partner', 'hs6', 'commodity', 'weight', 'value'))

raw$date <- as.Date(paste(raw$period, '01', sep='-'), format='%Y%m-%d')
raw$quarter <- as.Date(paste(year(raw$date), 3*ceiling(month(raw$date)/3), '01', sep='-'))

raw$value <- as.numeric(raw$value)
raw$weight <- as.numeric(raw$weight)

vdat <- raw[, by=.(quarter, partner, partner_code, hs6, commodity), 
	list(value=sum(value,na.rm=T), weight=sum(weight,na.rm=T), unit_val=sum(value,na.rm=T)/sum(weight,na.rm=T))]
vdat <- vdat[unit_val < Inf & partner != 'World' & partner != 'Other Asia, nes' & partner != 'Other Europe, nes', ]
vdat$hs6 <- ifelse(nchar(vdat$hs6) < 6, paste('0', vdat$hs6, sep=''), vdat$hs6)

vdat$year <- year(vdat$quarter)

## -- -- -- -- -- --
## instruments for price: 
##	ER (ref: zhu and tomasi 2020; khandelwal, 2010)
##	oil price x distance to Moscow (khandelwal, 2010)
##	tariffs (zhu and tomasi, 2020; 


## -- -- --
## exchange rate - 
## 	from IMF, SDRs per currency unit (must take SDR/USD divided by SDR/curr to get curr/USD)
## 	big ones missing: belarus, ukraine, turkey, hong kong, vietnam, ecuador (USD), serbia, uzbekistan, argentina, paraguay, bulgaria

setwd(paste(basedirectory, '\\ER data', sep=''))
erraw <- fread('IMF_ER_data.csv')
erraw <- melt(erraw, id.vars = c('Date'), measure.vars = names(erraw)[2:ncol(erraw)])
erraw <- erraw[Date != '', ]
setnames(erraw, c('date', 'partner', 'perSDR'))
erraw[, by=.(date), 'perusd' := list(perSDR[partner == 'United States of America']/perSDR)]

euro <- c('Austria', 'Belgium', 'Cyprus', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Ireland', 'Italy', 
		'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Portugal', 'Slovakia', 'Slovenia', 'Spain')
temp <- rbindlist(lapply(1:length(euro), function(x) erraw[partner == 'Euro', ]))
temp$partner <- sort(rep(euro, length(unique(erraw$date))))

erraw <- rbind(erraw, temp)
erraw$perSDR <- NULL
erraw$date <- as.Date(erraw$date, format='%d-%b-%Y')

## 	from investing.com, for those missing from IMF (e.g. https://ca.investing.com/currencies/usd-bgn-historical-data)
filelist <- list.files()
filelist <- filelist[grepl('Historical Data', filelist)]
countries <- c('Argentina', 'Bulgaria', 'Belarus', 'China, Hong Kong SAR', 'Paraguay', 'Serbia', 'Turkey', 'Ukraine', 'Uzbekistan', 'Viet Nam')

erraw2 <- rbindlist(lapply(1:length(filelist), function(x) data.table(fread(filelist[x]), partner=countries[x])))
erraw2 <- erraw2[, c(1,2,7), with=F]
setnames(erraw2, c('date', 'perusd', 'partner'))
erraw2$date <- as.Date(erraw2$date, format='%b %d, %Y')

erdat <- rbind(erraw, erraw2)
erdat$perusd <- as.numeric(gsub(',', '', erdat$perusd))
erdat$quarter <- as.Date(paste(year(erdat$date), 3*ceiling(month(erdat$date)/3), '01', sep='-'))
erdat[, by=.(date), 'rubper' := list(perusd[partner == 'Russian Federation']/perusd)]
erdat <- erdat[, by=.(quarter, partner), list(perusd = mean(perusd, na.rm=T), rubper = mean(rubper, na.rm=T))]

erdat <- erdat[order(partner, quarter), ]
erdat[, by=.(partner), 'rubper_l2' := list(c(NA, NA, rubper[1:(.N-2)]))]

## -- -- --
## oil price x distance between capitals

setwd(paste(basedirectory, '\\oil data', sep=''))
oilraw <- fread('Cushing_OK_WTI_Spot_Price_FOB.csv')
setnames(oilraw, c('date', 'oilprice'))
oilraw$date <- as.Date(paste('01', oilraw$date, sep='-'), format='%d-%b-%y')

oildat <- oilraw
oildat$quarter <- as.Date(paste(year(oildat$date), 3*ceiling(month(oildat$date)/3), '01', sep='-'))
oildat <- oildat[, by=.(quarter), list(oilprice = mean(oilprice, na.rm=T))]
oildat <- oildat[quarter >= as.Date('2012-01-01') & quarter <= as.Date('2015-12-31'), ]


distraw <- fread('capital_city_latlong.csv')
distraw <- distraw[, c(2,4,5), with=F]
setnames(distraw, c('partner', 'lat', 'long'))
distraw$partner[distraw$partner == 'China' & distraw$lat < 25] <- 'China, Hong Kong SAR'

distraw$dist <- sapply(1:nrow(distraw), function(x) distHaversine(c(distraw$long[x], distraw$lat[x]), 
											c(distraw$long[distraw$partner == 'Russia'], 
												distraw$lat[distraw$partner == 'Russia'])))
ddat <- distraw[, .(partner, dist)]
ddat$dist <- ddat$dist/1000000							## distance in 000's of kilometers
ddat$partner[ddat$partner == 'Korea North'] <- 'Rep. of Korea'
ddat$partner[ddat$partner == 'USA'] <- 'United States of America'
ddat$partner[ddat$partner == 'UK'] <- 'United Kingdom'
ddat$partner[ddat$partner == 'Czech Republic'] <- 'Czech Rep.'
ddat$partner[ddat$partner == 'Vietnam'] <- 'Viet Nam'
ddat$partner[ddat$partner == 'Serbia and Montenegro'] <- 'Serbia'

oddat <- rbindlist(lapply(1:nrow(ddat), function(x) data.table(oildat, partner = ddat$partner[x], dist = ddat$dist[x])))
oddat <- oddat[, by=.(quarter, partner), list(price_dist = oilprice*dist)]


## -- -- --
## WTO tariffs (MFN, at HS6 level)

setwd(paste(basedirectory, '\\tariff data\\raw', sep=''))
wtoraw <- fread('WtoData_20200729153100.csv')

wtodat <- wtoraw[, c(20, 12, 24), with=F]
setnames(wtodat, c('year', 'hs6', 'pcttariff'))
wtodat$hs6 <- ifelse(nchar(wtodat$hs6) == 5, paste('0', wtodat$hs6, sep=''), wtodat$hs6)


## -- -- -- -- -- --
## country population to proxy for "hidden varieties"

setwd(paste(basedirectory, '\\pop data', sep=''))
popraw <- fread('country_populations.csv')

popdat <- melt(popraw, id.vars = 'V1', measure.vars = paste('V', 5:64, sep=''))
setnames(popdat, c('partner', 'year', 'pop'))
popdat$year <- as.numeric(gsub('V', '', popdat$year)) + 1955
popdat <- popdat[partner != 'Country Name' & year >= 2012 & year <= 2016, ]

popdat$partner[popdat$partner == 'Czech Republic'] <- 'Czech Rep.'
popdat$partner[popdat$partner == 'United States'] <- 'United States of America'
popdat$partner[popdat$partner == 'Slovak Republic'] <- 'Slovakia'
popdat$partner[popdat$partner == 'Vietnam'] <- 'Viet Nam'
popdat$partner[popdat$partner == 'Hong Kong SAR, China'] <- 'China, Hong Kong SAR'
popdat$partner[popdat$partner == 'Korea, Rep.'] <- 'Rep. of Korea'
popdat$partner[popdat$partner == 'Iran, Islamic Rep.'] <- 'Iran'
popdat$partner[popdat$partner == 'Venezuela, RB'] <- 'Venezuela'

## impute to quarterly level
popdat[, by=.(partner), 'fpop' := list(c(pop[2:.N], NA))]
popdat <- popdat[!is.na(fpop), by=.(partner, year), 
			list(quarter = as.Date(paste(year, c(3,6,9,12), '01', sep='-'), format='%Y-%m-%d'), 
				pop = pop + 0.25*(0:3)*as.numeric(fpop - pop))]
popdat$pop <- as.numeric(popdat$pop)


## -- -- -- -- -- --
## data to recover outside option? not necessary
##	explained in detail below but essentially, outside option is hs4 x time varying, and will have hs4 x time varying dummy variable
##	will affect nest share coefficients however

### =========================================================================
### MERGED DATA FOR QUALITY REGRESSION
### =========================================================================

## basic data + exchange rate data
dat <- merge(vdat, erdat, by=c('partner', 'quarter'))

## basic + oilprice x distance data
dat <- merge(dat, oddat, by=c('partner', 'quarter'))

## basic + tariff data
dat <- merge(dat, wtodat, by=c('year', 'hs6'), all.x=T)

## basic + population data
dat <- merge(dat, popdat, by=c('year', 'quarter', 'partner'))


### =========================================================================
### CONSTRUCTING VARIABLES, INCLUDING QUALITY
### =========================================================================

## idea: belarus currency stays roughly similar to Russian
## 	differential change in average quality of imports for BEL vs. RoW?

## thought experiment: using reported unit values in USD to proxy for quality:
##	what if no reallocation?
## 		belarus: small delvauation rel to USD, unit vals (in USD) go down, looks like a reduction in quality
## 		US: no devaluation rel to USD, unit vals (in USD) do not go down, looks like quality remains high
##	only reallocation would lead to a finding of quality downgrading, but would need to "fight against" the exchange rate movement


## recovering quality:
## basic variables for khandelwal regression:							(vdat)
##	hs6 x country x quantity x quarter / total market (share)
## 	modification, instead of share: hs6 x country x quantity x quarter (quantity)
##	hs4 x quarter (outside option)
##	hs6 x country x quantity x quarter / hs6 x quantity x quarter (nest share)
##	hs6 x country x unitval x quarter (price proxy)
##	country x quarter x population (hidden varieties proxy) 
## instruments:
##	country x quarter exchange rate (price)						 	(erdat)
##	country x quarter oil price interacted with dist to Moscow (price)		(oddat)
##	country x quarter MFN tariffs (price)							(wtodat)
##	country x quarter x #hs6 varieties (nest share)						(vdat)
##	quarter x hs6 x number of countries (nest share)					(vdat)
##	

## quality reallocation regressions: 
##	because of omission of outside option, only within-quarter relative quality comparisons are valid
##	idea is that post devaluation, relative reallocation away from high quality for imports from countries with a larger ER devaluation
##	hs4 x quarter level dummies, to take care of differences in hs4 categories


## constructing variables

dat$hs4 <- substr(dat$hs6, 1, 4)
dat$hs2 <- substr(dat$hs6, 1, 2)
hs4s <- sort(unique(dat$hs4))
dat[, by=.(hs6, quarter), 'ns' := list(weight/sum(weight))]					## nest share
dat[, by=.(partner, quarter), 'num_hs6_vars' := list(length(unique(hs6)))]		## instrument for nest share
dat[, by=.(hs6, quarter), 'num_partners' := list(length(unique(partner)))]		## instrument for nest share

## -- -- --
## fixed effects regressions with flexible price and nest share coefficients (as in khandelwal (2010))

dat$partner_hs6 <- paste(dat$partner, dat$hs6, sep='_')
dat$hs4_quarter <- paste(dat$hs4, dat$quarter, sep='_')

dat$lns <- log(dat$ns)
dat$lnum_hs6_vars <- log(dat$num_hs6_vars)
dat$lnum_partners <- log(dat$num_partners)

hs2s <- sort(unique(dat$hs2))
fe2.l <- lapply(1:length(hs2s), function(x) 0)
fail <- rep(1, length(hs2s))
for (i in 1:length(hs2s)){
	fe2.l[[i]] <- tryCatch(felm(log(weight) ~ log(pop) | partner_hs6 + hs4_quarter | 
			(unit_val | lns ~ rubper + price_dist + pcttariff + lnum_hs6_vars + lnum_partners) | partner, data=dat[hs2==hs2s[i],]), 
			error = function(e) NA)
	fail[i] <- sum(is.na(fe2.l[[i]]))
}


## -- -- --
## recover the quality residuals: fixed effects + estimated ehats (fe0$residuals, or fe1$iv.residuals)

a.l <- lapply(1:length(fe2.l), function(x) tryCatch(getfe(fe2.l[[x]]), error = function(e) NA))
for (x in 1:length(a.l)){a.l[[x]]$idx <- tryCatch(as.character(a.l[[x]]$idx), error = function(e) NA)}
b.l <- lapply(1:length(a.l), function(x) tryCatch(strsplit(a.l[[x]]$idx, '_'), error = function(e) NA))
for (i in 1:length(a.l)){
	a.l[[i]]$f1 <- sapply(1:length(b.l[[i]]), function(x) b.l[[i]][[x]][1])
	a.l[[i]]$f2 <- sapply(1:length(b.l[[i]]), function(x) b.l[[i]][[x]][2])
}
a1.l <- a2.l <- lapply(1:length(a.l), function(x) '')
for (i in 1:length(a.l)){
	a.l[[i]] <- data.table(a.l[[i]])
	a1.l[[i]] <- tryCatch(a.l[[i]][grepl('partner', fe), .(idx, effect)], error = function(e) NA)
	a2.l[[i]] <- tryCatch(a.l[[i]][grepl('quarter', fe), .(idx, effect)], error = function(e) NA)
	tryCatch(setnames(a1.l[[i]], c('partner_hs6', 'fe_ch')), error = function(e) NA)
	tryCatch(setnames(a2.l[[i]], c('hs4_quarter', 'fe_Ht')), error = function(e) NA)
}

fedat4 <- dat[!is.na(pcttariff), .(year, quarter, hs6, hs4, hs2, partner, partner_hs6, hs4_quarter)]
fedat4.l <- lapply(1:length(a.l), function(x) NULL)
for (i in 1:length(a.l)){
	temp <- fedat4[hs2 == hs2s[i], ]
	if(!is.na(a1.l[[i]])){
		temp$fe_res <- fe2.l[[i]]$iv.residuals
		temp <- merge(temp, a1.l[[i]], by=c('partner_hs6'))
		temp <- merge(temp, a2.l[[i]], by=c('hs4_quarter'))
		temp$Q <- temp$fe_ch + temp$fe_Ht + temp$fe_res
		fedat4.l[[i]] <- temp
	}
}
fedat4 <- rbindlist(fedat4.l)


### =========================================================================
### QUALITY REALLOCATION REGRESSION
### =========================================================================

## if country didn't have a big concurrent devaluation, then imports became relatively more expensive
##	would then expect quality downgrading
##	i.e., if rubles per unit currency increase, then expect a reduction in average quality
## if country did have a big devaluation, then imports became relatively less expensive
##	would then expect no quality downgrading
##	i.e., if rubles per unit currency do not increase, then expect no reduction in average quality


## -- -- --
## USING THE FULL SET OF INSTRUMENTS PLUS FLEXIBLE HS2 COEFS, AND STRIPPING OUT SANCTIONED HS2 CATEGORIES

sanctions <- c(c('Australia', 'United States of America', 'Canada', 'Norway'), 
			c('Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Rep.', 'Denmark', 'Estonia', 'Finland', 'France', 
			'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 
			'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden'))
sanctions_hs2 <- c('02', '03', '04', '07', '08', '16', '19', '21')


fedat4$hs4_partner <- paste(fedat4$hs4, fedat4$partner, sep='_')
fedat4$hs6_partner <- paste(fedat4$hs6, fedat4$partner, sep='_')

fedat4$hs6_quarter <- paste(fedat4$hs6, fedat4$quarter, sep='_')
fedat4$hs2 <- substr(fedat4$hs6, 1, 2)
fedat4 <- merge(fedat4, erdat, by=c('quarter', 'partner'))

qr2fe1 <- felm(Q ~ log(rubper) | hs4_quarter + hs4_partner | 0 | hs4_partner, 
		data=fedat4[!(partner %in% sanctions & hs2 %in% sanctions_hs2) & partner != 'Ukraine', ])
qr2fe2 <- felm(Q ~ log(rubper_l2) | hs4_quarter + hs4_partner | 0 | hs4_partner, 
		data=fedat4[!(partner %in% sanctions & hs2 %in% sanctions_hs2) & partner != 'Ukraine', ])

qr2fe3 <- felm(Q ~ log(rubper) | hs4_quarter + hs6_partner | 0 | hs4_partner, 
		data=fedat4[!(partner %in% sanctions & hs2 %in% sanctions_hs2) & partner != 'Ukraine', ])
qr2fe4 <- felm(Q ~ log(rubper_l2) | hs4_quarter + hs6_partner | 0 | hs4_partner, 
		data=fedat4[!(partner %in% sanctions & hs2 %in% sanctions_hs2) & partner != 'Ukraine', ])			## replaced sanctions2 (nonexistant) with sanctions

stargazer(qr2fe1, qr2fe2, qr2fe3, qr2fe4, type='text')

### =========================================================================
### OUTPUT
### =========================================================================

stargazer(qr2fe1, qr2fe2, qr2fe3, qr2fe4, 
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('HS4 $\\times$ Country FE', '\\checkmark', '\\checkmark', '', ''),
					c('HS4 $\\times$ Season FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('HS6 $\\times$ Country FE', '', '', '\\checkmark', '\\checkmark')
					))









##